{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 结构化数据一般使用pandas中的DataFrame进行处理"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import tensorflow as tf \n",
    "from tensorflow.keras import layers,models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dftrain_raw=pd.read_csv('./data/train_csv')\n",
    "dftest_raw=pd.read_csv('./data/test_csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def preprocessing(data):\n",
    "    df_result=pd.DataFrame()\n",
    "    #类别性one-hot编码\n",
    "    df_class=pd.get_dummies(data['Pclass'])   #编码之后为多列，因此，对两个dataframe 进行concat\n",
    "    df_calss.columns=['Pclass_'+str(x) for x in df_class.columns]\n",
    "    df_result=pd.concat([df_result,df_class],axis=1)\n",
    "    #数值型特征，并对缺失值处理\n",
    "    df_result['age']=data['age'].fillna(0)\n",
    "    df-result['age_null']=pd.isna(data['age']).astype('int32')   #返回0，1\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_train=preprocessing(dftrain_raw)\n",
    "y_train=dftrain_raw['label'].values\n",
    "\n",
    "x_test=preprocessing(dftest_raw)\n",
    "y_test=dftest_raw['label'].values\n",
    "\n",
    "print(\"x_train.shape = \",x_train.shape)\n",
    "print(\"x_test.shape = \",x_test.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 定义模型\n",
    "\n",
    "1. Sequential 按层顺序构建模型\n",
    "2. 使用函数API构建任意结构模型\n",
    "3. 继承Model类构建自定义模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "model=models.Sequential()\n",
    "model.add(layers.Dense(20,activation='relu',input_shape(15,)))\n",
    "model.add(layers.Dense(10,activation='relu'))\n",
    "model.add(layers.Dense(1,activation='sigmoid'))\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 训练模型\n",
    "模型训练有三种方法，内置fit方法，内置train_on_batch方法，以及自定义训练循环"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#而分类问题选择二元交叉熵损失函数\n",
    "model.compile(optimizer='adam',\n",
    "              loss='binary_crossentropy',\n",
    "              metrics='['AUC']'\n",
    "             )\n",
    "history=model.fit(x_train,y_train,\n",
    "                 batch_size=64,\n",
    "                 epochs=30,\n",
    "                 validation_split=0.2) #划分训练集和测试机"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 评估模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "def plot_metric(history,metric):\n",
    "    train_metrics=history.history[metric]\n",
    "    val_metrics=history.history[\"val\"+metric]\n",
    "    epochs=ranges(1,len(trian_metrics)+1)\n",
    "    plt.plot(epochs,train_metrics,'bo--')\n",
    "    plt.plot(epochs,val_metrics,'ro--')\n",
    "    plt.title('Training and validation'+metric)\n",
    "    plt.xlabel(\"Epochs\")\n",
    "    plt.ylabel(metric)\n",
    "    plt.legemd(\"train_\"+metric,\"val_\"+metric)\n",
    "    plt.show"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_metric(history,'loss')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_metric(history,\"AUC\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查看模型在测试集上的效果\n",
    "model.evaluate(x=x_test,y=y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 使用模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#预测概率\n",
    "model.predict(x_test[0:10])\n",
    "#预测类别\n",
    "y_pred=model.predict_classes(x_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 保存模型\n",
    "\n",
    "可以使用Keras方式保存模型，也可以使用TensorFlow原生方式保存。前者仅仅适合使用Python环境恢复模型，后者则可以跨平台进行模型部署。\n",
    "\n",
    "推荐使用后一种方式进行保存。"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.keras 方式保存模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#保存模型的结构及权重\n",
    "model.save(./data/keras_model.h5)\n",
    "del model #删除现有模型\n",
    "#下载模型\n",
    "model=models.load_model(./data/keras_model.h5)\n",
    "model.evaluate(x_test,y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#保存模型结构\n",
    "json_str=model.to_json()\n",
    "#恢复模型结构\n",
    "model_json=model.model_from_json(json_str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#保存模型权重\n",
    "model.save_weights('./data/keras_model_weight.h5')\n",
    "#恢复模型结构\n",
    "model_json=models.model_from_json(json_str)\n",
    "model_json.compile(optimizer='adam',\n",
    "                  loss='binary_crossentropy',\n",
    "                  metrics=['AUC'])\n",
    "#加载权重\n",
    "model_json.load_weights('./data/keras_model_weight.h5')\n",
    "model_json.evaluate(x_test,y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### TensorFlow原生方式保存"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 保存模型结构与模型参数到文件,该方式保存的模型具有跨平台性便于部署\n",
    "\n",
    "model.save('./data/tf_model_savedmodel', save_format=\"tf\")\n",
    "print('export saved model.')\n",
    "\n",
    "model_loaded = tf.keras.models.load_model('./data/tf_model_savedmodel')\n",
    "model_loaded.evaluate(x_test,y_test)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
